In [ ]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
#set background color grey
sns.set_theme(style="darkgrid")
In [ ]:
df = pd.read_csv("all_turns_2.csv")
df.drop(columns=['Unnamed: 0'], inplace=True)
df['turn_duration'] = 0.2*(df['end_idx'].astype('float') - df['start_idx'].astype('float'))
df.describe().T
Out[Â ]:
| count | mean | std | min | 25% | 50% | 75% | max | |
|---|---|---|---|---|---|---|---|---|
| participant_id | 210.0 | 1838.609524 | 619.424474 | 407.000000 | 2102.000000 | 2105.000000 | 2107.000000 | 2111.000000 |
| path_num | 210.0 | 2.247619 | 0.735705 | 1.000000 | 2.000000 | 2.000000 | 3.000000 | 3.000000 |
| turn_num | 210.0 | 2.642857 | 1.785393 | 1.000000 | 1.000000 | 2.000000 | 3.750000 | 10.000000 |
| start_idx | 210.0 | 539.523810 | 430.918594 | 50.000000 | 246.000000 | 435.500000 | 666.000000 | 2199.000000 |
| end_idx | 210.0 | 590.380952 | 430.636602 | 83.000000 | 296.250000 | 483.000000 | 712.000000 | 2264.000000 |
| walking_direction_lag | 210.0 | -3.980952 | 24.546014 | -151.000000 | -12.000000 | -1.000000 | 6.000000 | 107.000000 |
| walking_direction_base_corr | 210.0 | 0.075067 | 0.415642 | -0.857578 | -0.315052 | 0.093226 | 0.413633 | 0.941918 |
| walking_direction_lagged_corr | 210.0 | 0.470502 | 0.163364 | 0.122957 | 0.348448 | 0.442860 | 0.570997 | 0.955078 |
| walking_direction_dtw | 210.0 | 43.989552 | 27.188144 | 4.239983 | 25.985486 | 36.686731 | 52.961837 | 162.955230 |
| speeds_lag | 210.0 | -1.814286 | 15.690145 | -69.000000 | -8.000000 | -1.000000 | 4.000000 | 62.000000 |
| speeds_base_corr | 210.0 | 0.181225 | 0.343211 | -0.797566 | -0.075247 | 0.192109 | 0.446230 | 0.881916 |
| speeds_lagged_corr | 210.0 | 0.490001 | 0.152821 | 0.171078 | 0.372479 | 0.472339 | 0.600638 | 0.881916 |
| speeds_dtw | 210.0 | 35.361913 | 18.733911 | 10.222585 | 23.357122 | 29.799045 | 42.693865 | 141.492438 |
| mean_distance | 210.0 | 2.396365 | 1.569543 | 0.336612 | 1.400366 | 2.112387 | 2.974131 | 13.639054 |
| mean_speed_difference | 210.0 | 0.372175 | 0.154074 | 0.086809 | 0.263861 | 0.340143 | 0.447599 | 0.922073 |
| mean_walking_direction_difference | 210.0 | 62.257972 | 19.707774 | 14.479058 | 48.836197 | 62.728343 | 77.249692 | 120.316045 |
| mean_pace_asymmetry | 210.0 | 0.436723 | 0.131052 | 0.110604 | 0.347882 | 0.428110 | 0.505151 | 0.876306 |
| turn_duration | 210.0 | 10.171429 | 6.216412 | 5.000000 | 5.800000 | 8.000000 | 11.950000 | 46.200000 |
In [ ]:
# box plot turn duration
sns.boxplot(x=df['turn_duration'])
plt.title('Box plot of turn duration')
plt.show()
In [ ]:
df['normalized_walking_direction_dtw'] = df['walking_direction_dtw'] / (df['turn_duration'] / 0.2)
df['normalized_speeds_dtw'] = df['speeds_dtw'] / (df['turn_duration'] / 0.2)
In [ ]:
df['abs_walking_direction_lag'] = df['walking_direction_lag'].abs()
df['abs_speeds_lag'] = df['speeds_lag'].abs()
In [ ]:
relevant_features = [
'turn_duration',
'mean_distance',
'mean_pace_asymmetry',
'walking_direction_lag',
'abs_walking_direction_lag',
'walking_direction_dtw',
'normalized_walking_direction_dtw',
# 'walking_direction_base_corr',
'walking_direction_lagged_corr',
# 'mean_walking_direction_difference',
'speeds_lag',
'abs_speeds_lag',
'speeds_dtw',
'normalized_speeds_dtw',
# 'speeds_base_corr',
'speeds_lagged_corr',
# 'mean_speed_difference',
]
In [ ]:
corr = df[relevant_features].corr(method='pearson',numeric_only=True)
mask = np.abs(corr) < 0.3
plt.figure(figsize=(12, 10))
sns.heatmap(corr, annot=True, fmt=".2f", mask=mask)
# make it bigger
plt.title(f"Metrics Correlation Matrix - Original Data (n={len(df)})")
plt.show()
In [ ]:
# find rows with the same participant_id and path_num with overlapping start_idx and end_idx
df['overlapping'] = False
for index, row in df.iterrows():
if len(df[(df['participant_id'] == row['participant_id']) & (df['path_num'] == row['path_num']) & (df['start_idx'] >= row['start_idx']) & (df['start_idx'] <= row['end_idx'])]) > 1 or \
len(df[(df['participant_id'] == row['participant_id']) & (df['path_num'] == row['path_num']) & (df['end_idx'] >= row['start_idx']) & (df['end_idx'] <= row['end_idx'])]) > 1:
df.at[index, 'overlapping'] = True
overlapping_and_not_subset = df[df['overlapping'] == True]
oans = overlapping_and_not_subset
corr_oans = oans[relevant_features].corr(method='pearson',numeric_only=True)
mask = np.abs(corr_oans) < 0.3
plt.figure(figsize=(12, 10))
sns.heatmap(corr_oans, annot=True, fmt=".2f", mask=mask)
plt.title(f"Metrics Correlation Matrix - Overlapping Data (n={len(oans)})")
plt.show()
In [ ]:
threshold = 0.3
filtered_df = df[(df['walking_direction_lagged_corr'] > threshold) & (df['speeds_lagged_corr'] > threshold)]
filtered_oans = filtered_df[filtered_df['overlapping'] == True]
corr_filtered_oans = filtered_oans[relevant_features].corr(method='pearson',numeric_only=True)
mask = (np.abs(corr_filtered_oans) < 0.3)
plt.figure(figsize=(12, 10))
sns.heatmap(corr_filtered_oans, annot=True, fmt=".2f", mask=mask)
plt.title(f"Metrics Correlation Matrix - Filtered Overlapping Data (n={len(filtered_oans)})")
plt.show()
In [ ]:
filtered_oans.describe().T
Out[Â ]:
| count | mean | std | min | 25% | 50% | 75% | max | |
|---|---|---|---|---|---|---|---|---|
| participant_id | 96.0 | 1822.364583 | 636.073748 | 407.000000 | 2101.750000 | 2104.000000 | 2107.000000 | 2111.000000 |
| path_num | 96.0 | 2.218750 | 0.728418 | 1.000000 | 2.000000 | 2.000000 | 3.000000 | 3.000000 |
| turn_num | 96.0 | 2.375000 | 1.649561 | 1.000000 | 1.000000 | 2.000000 | 3.000000 | 8.000000 |
| start_idx | 96.0 | 473.281250 | 387.076266 | 54.000000 | 223.500000 | 394.500000 | 579.250000 | 2129.000000 |
| end_idx | 96.0 | 529.791667 | 384.969470 | 108.000000 | 295.500000 | 445.500000 | 638.500000 | 2179.000000 |
| walking_direction_lag | 96.0 | -6.208333 | 22.130197 | -79.000000 | -17.000000 | -2.500000 | 5.250000 | 47.000000 |
| walking_direction_base_corr | 96.0 | 0.044735 | 0.433912 | -0.857578 | -0.342859 | 0.079463 | 0.436595 | 0.852908 |
| walking_direction_lagged_corr | 96.0 | 0.489776 | 0.142875 | 0.307794 | 0.375805 | 0.447162 | 0.588371 | 0.955078 |
| walking_direction_dtw | 96.0 | 48.883543 | 27.746676 | 4.239983 | 27.611524 | 45.737083 | 57.678972 | 162.955230 |
| speeds_lag | 96.0 | -3.989583 | 11.887627 | -48.000000 | -9.000000 | -1.500000 | 0.250000 | 23.000000 |
| speeds_base_corr | 96.0 | 0.238395 | 0.352438 | -0.797566 | 0.048711 | 0.302736 | 0.488232 | 0.839497 |
| speeds_lagged_corr | 96.0 | 0.513716 | 0.138482 | 0.301195 | 0.395598 | 0.490557 | 0.615078 | 0.839497 |
| speeds_dtw | 96.0 | 36.646493 | 17.578670 | 10.673369 | 23.200637 | 32.107329 | 47.021830 | 90.532321 |
| mean_distance | 96.0 | 2.315269 | 1.362139 | 0.433665 | 1.364710 | 1.992593 | 3.016074 | 9.579321 |
| mean_speed_difference | 96.0 | 0.356451 | 0.132146 | 0.094192 | 0.266317 | 0.336095 | 0.429151 | 0.889566 |
| mean_walking_direction_difference | 96.0 | 61.315420 | 18.263149 | 14.479058 | 50.026540 | 62.640650 | 74.918968 | 98.622592 |
| mean_pace_asymmetry | 96.0 | 0.428294 | 0.099920 | 0.122233 | 0.365532 | 0.425443 | 0.476916 | 0.734916 |
| turn_duration | 96.0 | 11.302083 | 5.717342 | 5.000000 | 6.950000 | 10.100000 | 13.900000 | 34.200000 |
| normalized_walking_direction_dtw | 96.0 | 0.897978 | 0.356410 | 0.146206 | 0.644327 | 0.834471 | 1.071630 | 1.960544 |
| normalized_speeds_dtw | 96.0 | 0.675115 | 0.177642 | 0.256842 | 0.558886 | 0.670667 | 0.793171 | 1.377559 |
| abs_walking_direction_lag | 96.0 | 16.291667 | 16.139469 | 0.000000 | 3.750000 | 12.500000 | 23.250000 | 79.000000 |
| abs_speeds_lag | 96.0 | 8.135417 | 9.514111 | 0.000000 | 1.000000 | 5.000000 | 12.000000 | 48.000000 |
In [ ]:
from scipy.stats import pearsonr
for feature in relevant_features:
to_display = []
for feature2 in relevant_features:
if not feature.startswith(feature2) and not feature2.startswith(feature) and not feature.endswith(feature2) and not feature2.endswith(feature)\
and np.abs(corr_filtered_oans.loc[feature, feature2]) > 0.3:
to_display.append(feature2)
if len(to_display) == 0:
continue
# set plot grid of 1xlen(to_display)
fig, axs = plt.subplots(int(np.ceil(len(to_display)/3)), min(len(to_display),3), figsize=(5*min(len(to_display),3),5*int(np.ceil(len(to_display)/3))))
# print(axs.shape)
axs = np.atleast_2d(axs) # Ensure axs is always an array, even if it's a single subplot
for i, feature2 in enumerate(to_display):
peares = pearsonr(filtered_oans[feature], filtered_oans[feature2], alternative='two-sided')
pcorr, p_val = peares.statistic, peares.pvalue
CI = peares.confidence_interval(confidence_level=0.95)
# Scatter plot
sns.scatterplot(x=feature, y=feature2, data=filtered_oans, ax=axs[int(np.ceil(i/3))-1,i%3])
# Regression line
sns.regplot(x=feature, y=feature2, data=filtered_oans, scatter=False, line_kws={'color': 'red'}, ax=axs[int(np.ceil(i/3))-1,i%3])
axs[int(np.ceil(i/3))-1,i%3].set_title(f"compared with {feature2}\ncorr: {round(corr_filtered_oans.loc[feature, feature2], 3)}, p_val: {round(p_val,5)}, CI: {[round(c,3) for c in CI]}", fontweight='bold')
# add title "feature vs correlated features" to the plot
fig.suptitle(f"{feature}'s correlations", fontweight='bold')
plt.tight_layout()
plt.show()
In [ ]:
from PIL import Image
import seaborn as sns
from scipy import stats
# for each feature, find highest and lowest valued row and display them
for feature in relevant_features:
# print(f"Feature: {feature}")
# print("Highest values:")
h_res = filtered_oans.loc[filtered_oans[feature].nlargest(1).index, ['participant_id', 'person_robot', 'path_num', 'turn_num', feature]]
# print(h_res)
h_base_path = f"./turns/{h_res['participant_id'].values[0]}/{h_res['person_robot'].values[0]}/run_{h_res['path_num'].values[0]}/turn_{h_res['turn_num'].values[0]}/"
fig, axs = plt.subplots(1, 4, figsize=(20, 5))
paths_img = Image.open(h_base_path + "paths.png")
axs[0].imshow(paths_img)
axs[0].axis('off')
distance_img = Image.open(h_base_path + "distance.png")
axs[1].imshow(distance_img)
axs[1].axis('off')
walking_directions_img = Image.open(h_base_path + "walking_directions.png")
axs[2].imshow(walking_directions_img)
axs[2].axis('off')
speeds_img = Image.open(h_base_path + "speeds.png")
axs[3].imshow(speeds_img)
axs[3].axis('off')
to_print_dict = {k: round(v_val, 3) if isinstance(v_val, float) else v_val for k,v in h_res.to_dict().items() for v_key, v_val in v.items()}
to_print_str = ", ".join([f"{k}: {v}" for k,v in to_print_dict.items()])
fig.suptitle(f"{feature} - highest value\n {to_print_str}", fontweight='bold')
plt.tight_layout()
plt.show()
# print("Lowest values:")
l_res = filtered_oans.loc[filtered_oans[feature].nsmallest(1).index, ['participant_id', 'person_robot', 'path_num', 'turn_num', feature]]
# print(l_res)
l_base_path = f"./turns/{l_res['participant_id'].values[0]}/{l_res['person_robot'].values[0]}/run_{l_res['path_num'].values[0]}/turn_{l_res['turn_num'].values[0]}/"
fig, axs = plt.subplots(1, 4, figsize=(20, 5))
paths_img = Image.open(l_base_path + "paths.png")
axs[0].imshow(paths_img)
axs[0].axis('off')
distance_img = Image.open(l_base_path + "distance.png")
axs[1].imshow(distance_img)
axs[1].axis('off')
walking_directions_img = Image.open(l_base_path + "walking_directions.png")
axs[2].imshow(walking_directions_img)
axs[2].axis('off')
speeds_img = Image.open(l_base_path + "speeds.png")
axs[3].imshow(speeds_img)
axs[3].axis('off')
to_print_dict = {k: round(v_val, 3) if isinstance(v_val, float) else v_val for k,v in l_res.to_dict().items() for v_key, v_val in v.items()}
to_print_str = ", ".join([f"{k}: {v}" for k,v in to_print_dict.items()])
fig.suptitle(f"{feature} - lowest value\n {to_print_str}", fontweight='bold')
plt.tight_layout()
plt.show()
print("\n\n")
In [ ]:
fig, axs = plt.subplots(int(np.ceil(len(relevant_features)/3)), min(len(relevant_features),3), figsize=(15, 5*int(np.ceil(len(relevant_features)/3))))
axs = np.atleast_2d(axs) # Ensure axs is always an array, even if it's a single subplot
for i, feature in enumerate(relevant_features):
sns.histplot(data=filtered_oans, x=feature, kde=True, ax=axs[int(np.ceil(i/3))-1,i%3])
axs[int(np.ceil(i/3))-1,i%3].set_title(feature)
axs[int(np.ceil(i/3))-1,i%3].set_xlabel('')
axs[int(np.ceil(i/3))-1,i%3].set_ylabel('')
plt.tight_layout()
plt.show()
In [ ]:
import scipy.stats as stats
fig, axs = plt.subplots(int(np.ceil(len(relevant_features)/3)), min(len(relevant_features),3), figsize=(15, 5*int(np.ceil(len(relevant_features)/3))))
axs = np.atleast_2d(axs) # Ensure axs is always an array, even if it's a single subplot
for i, feature in enumerate(relevant_features):
stats.probplot(filtered_oans[feature], dist="norm", plot=axs[int(np.ceil(i/3))-1,i%3])
axs[int(np.ceil(i/3))-1,i%3].set_title(feature)
axs[int(np.ceil(i/3))-1,i%3].set_xlabel('Theoretical Quantiles')
axs[int(np.ceil(i/3))-1,i%3].set_ylabel('Ordered Values')
plt.tight_layout()
plt.show()
In [ ]:
fig, axs = plt.subplots(int(np.ceil(len(relevant_features)/3)), min(len(relevant_features),3), figsize=(15, 5*int(np.ceil(len(relevant_features)/3))))
axs = np.atleast_2d(axs) # Ensure axs is always an array, even if it's a single subplot
for i, feature in enumerate(relevant_features):
sns.boxplot(data=filtered_oans, y=feature, ax=axs[int(np.ceil(i/3))-1,i%3])
axs[int(np.ceil(i/3))-1,i%3].set_title(feature)
axs[int(np.ceil(i/3))-1,i%3].set_xlabel('')
axs[int(np.ceil(i/3))-1,i%3].set_ylabel('')
plt.tight_layout()
plt.show()
MISC¶
In [ ]:
stop codon
Cell In[224], line 1 stop codon ^ SyntaxError: invalid syntax
In [ ]:
import numpy as np
# plot walking_direction_lag and walking_direction_dtw
plt.scatter(abs(filtered_oans['walking_direction_lag']), filtered_oans['walking_direction_dtw'])
plt.xlabel('walking_direction_lag')
plt.ylabel('walking_direction_dtw')
plt.title('Absolute Walking Direction Lag vs DTW')
# fit regression line
x = abs(filtered_oans['walking_direction_lag'])
y = filtered_oans['walking_direction_dtw']
coefficients = np.polyfit(x, y, 1)
regression_line = np.polyval(coefficients, x)
plt.plot(x, regression_line, color='red')
plt.show()
# plot speeds_lag and speeds_dtw
plt.scatter(abs(filtered_oans['speeds_lag']), filtered_oans['speeds_dtw'])
plt.xlabel('speeds_lag')
plt.ylabel('speeds_dtw')
plt.title('Absolute Speeds Lag vs DTW')
# fit regression line
x = abs(filtered_oans['speeds_lag'])
y = filtered_oans['speeds_dtw']
coefficients = np.polyfit(x, y, 1)
regression_line = np.polyval(coefficients, x)
plt.plot(x, regression_line, color='red')
plt.show()
In [ ]:
# plot walking_direction_lag vs all other relevant features
for feature in relevant_features:
if feature == 'walking_direction_lag':
continue
plt.scatter(filtered_oans['walking_direction_lag'], filtered_oans[feature])
plt.xlabel('walking_direction_lag')
plt.ylabel(feature)
plt.title(f'Walking Direction Lag vs {feature}')
plt.show()
In [ ]:
# plot speeds_lag vs all other relevant features
for feature in relevant_features:
if feature == 'speeds_lag':
continue
plt.scatter(filtered_oans['speeds_lag'], filtered_oans[feature])
plt.xlabel('speeds_lag')
plt.ylabel(feature)
plt.title(f'Speeds Lag vs {feature}')
plt.show()
In [ ]:
# filter out outliers
filtered_oans = filtered_oans[(filtered_oans['walking_direction_lag'] < 20) & (filtered_oans['walking_direction_lag'] > -20) & (filtered_oans['speeds_lag'] < 20) & (filtered_oans['speeds_lag'] > -20)]
# plot walking direction lag vs speeds lag
plt.scatter(filtered_oans['walking_direction_lag'], filtered_oans['speeds_lag'])
plt.xlabel('walking_direction_lag')
plt.ylabel('speeds_lag')
plt.title('Walking Direction Lag vs Speeds Lag')
# plt.xlim(-20, 20)
# plt.ylim(-20, 20)
# plot regression line
x = filtered_oans['walking_direction_lag']
y = filtered_oans['speeds_lag']
coefficients = np.polyfit(x, y, 1)
regression_line = np.polyval(coefficients, x)
plt.plot(x, regression_line, color='red')
plt.show()
In [ ]:
# plot walking direction dtw vs speeds dtw
plt.scatter(filtered_oans['walking_direction_dtw'], filtered_oans['speeds_dtw'])
plt.xlabel('walking_direction_dtw')
plt.ylabel('speeds_dtw')
plt.title('Walking Direction DTW vs Speeds DTW')
# fit regression line
x = filtered_oans['walking_direction_dtw']
y = filtered_oans['speeds_dtw']
coefficients = np.polyfit(x, y, 1)
regression_line = np.polyval(coefficients, x)
plt.plot(x, regression_line, color='red')
plt.show()
In [ ]:
df[df['walking_direction_lagged_corr'] > df['walking_direction_base_corr']].sort_values(by='walking_direction_lag', ascending=False)
Out[Â ]:
| participant_id | path_num | person_robot | turn_num | start_idx | end_idx | walking_direction_lag | walking_direction_base_corr | walking_direction_lagged_corr | walking_direction_dtw | ... | speeds_dtw | mean_distance | mean_speed_difference | mean_walking_direction_difference | turn_duration | normalized_walking_direction_dtw | normalized_speeds_dtw | abs_walking_direction_lag | abs_speeds_lag | overlapping | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 202 | 2111 | 2 | person | 2 | 507 | 604 | 47 | -0.424161 | 0.337934 | 103.482527 | ... | 65.295422 | 1.392034 | 0.250340 | 57.612406 | 19.4 | 1.066830 | 0.673149 | 47 | 6 | True |
| 80 | 2103 | 2 | robot | 2 | 578 | 634 | 43 | -0.249106 | 0.616893 | 61.422126 | ... | 25.631941 | 2.989126 | 0.338179 | 47.827222 | 11.2 | 1.096824 | 0.457713 | 43 | 3 | True |
| 181 | 2108 | 3 | robot | 3 | 528 | 592 | 42 | 0.030613 | 0.248076 | 73.712595 | ... | 47.000078 | 2.071863 | 0.453484 | 70.369095 | 12.8 | 1.151759 | 0.734376 | 42 | 22 | True |
| 140 | 2106 | 3 | robot | 9 | 1990 | 2049 | 37 | -0.272773 | 0.455208 | 51.252940 | ... | 61.127953 | 2.010555 | 0.531695 | 60.823225 | 11.8 | 0.868694 | 1.036067 | 37 | 39 | False |
| 175 | 2108 | 2 | robot | 5 | 708 | 760 | 33 | -0.593129 | 0.397421 | 87.662114 | ... | 54.166806 | 2.526451 | 0.815135 | 67.273508 | 10.4 | 1.685810 | 1.041669 | 33 | 21 | False |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 118 | 2106 | 1 | robot | 1 | 395 | 479 | -41 | -0.549742 | 0.596739 | 101.277338 | ... | 36.920811 | 2.505176 | 0.363976 | 60.501683 | 16.8 | 1.205683 | 0.439533 | 41 | 11 | True |
| 25 | 408 | 2 | robot | 1 | 394 | 450 | -47 | 0.427449 | 0.433015 | 53.091645 | ... | 35.825730 | 3.623730 | 0.178786 | 50.134179 | 11.2 | 0.948065 | 0.639745 | 47 | 0 | True |
| 197 | 2109 | 3 | robot | 3 | 244 | 333 | -51 | -0.364310 | 0.376058 | 79.085565 | ... | 62.508268 | 2.410478 | 0.621798 | 86.824998 | 17.8 | 0.888602 | 0.702340 | 51 | 18 | True |
| 192 | 2109 | 2 | robot | 4 | 431 | 500 | -59 | 0.225661 | 0.297712 | 65.438534 | ... | 32.955370 | 1.050741 | 0.305188 | 92.699869 | 13.8 | 0.948385 | 0.477614 | 59 | 3 | True |
| 70 | 2102 | 3 | robot | 5 | 1145 | 1237 | -79 | -0.421383 | 0.470861 | 125.077491 | ... | 43.254521 | 1.194304 | 0.334011 | 70.750520 | 18.4 | 1.359538 | 0.470158 | 79 | 7 | True |
176 rows × 23 columns
In [ ]:
print(df.dtypes)
participant_id int64 path_num int64 person_robot object turn_num int64 start_idx int64 end_idx int64 walking_direction_lag int64 walking_direction_base_corr float64 walking_direction_lagged_corr float64 walking_direction_dtw float64 speeds_lag int64 speeds_base_corr float64 speeds_lagged_corr float64 speeds_dtw float64 mean_distance float64 mean_speed_difference float64 mean_walking_direction_difference float64 turn_duration float64 normalized_walking_direction_dtw float64 normalized_speeds_dtw float64 abs_walking_direction_lag int64 abs_speeds_lag int64 overlapping bool dtype: object
In [ ]:
oans.corr
Out[Â ]:
<bound method DataFrame.corr of participant_id path_num person_robot turn_num start_idx end_idx \
1 407 1 robot 1 232 300
2 407 2 person 1 80 108
4 407 2 person 3 400 431
5 407 2 person 4 649 686
6 407 2 robot 1 82 141
.. ... ... ... ... ... ...
197 2109 3 robot 3 244 333
198 2111 1 person 1 314 373
200 2111 1 robot 2 307 363
202 2111 2 person 2 507 604
208 2111 3 robot 1 87 167
walking_direction_lag walking_direction_base_corr \
1 0 0.732906
2 -14 -0.857578
4 0 0.679583
5 -2 0.506926
6 -18 -0.490586
.. ... ...
197 -51 -0.364310
198 -2 0.578583
200 -12 0.503671
202 47 -0.424161
208 -9 -0.073150
walking_direction_lagged_corr walking_direction_dtw ... speeds_dtw \
1 0.732906 34.565064 ... 38.493228
2 0.444096 54.895241 ... 20.858506
4 0.679583 20.961050 ... 10.673369
5 0.517505 27.650007 ... 37.784558
6 0.317564 83.819802 ... 48.835245
.. ... ... ... ...
197 0.376058 79.085565 ... 62.508268
198 0.615375 24.827749 ... 28.068922
200 0.641915 27.348872 ... 34.160874
202 0.337934 103.482527 ... 65.295422
208 0.444889 53.814038 ... 49.325597
mean_distance mean_speed_difference mean_walking_direction_difference \
1 1.844983 0.318515 60.896701
2 2.682582 0.469750 43.091946
4 1.857478 0.301516 37.465756
5 1.766503 0.331568 65.157930
6 3.421456 0.687661 55.353376
.. ... ... ...
197 2.410478 0.621798 86.824998
198 1.043703 0.249136 68.121790
200 1.039405 0.295089 71.104648
202 1.392034 0.250340 57.612406
208 1.376935 0.280625 44.422179
turn_duration normalized_walking_direction_dtw normalized_speeds_dtw \
1 13.6 0.508310 0.566077
2 5.6 1.960544 0.744947
4 6.2 0.676163 0.344302
5 7.4 0.747297 1.021204
6 11.8 1.420675 0.827716
.. ... ... ...
197 17.8 0.888602 0.702340
198 11.8 0.420809 0.475744
200 11.2 0.488373 0.610016
202 19.4 1.066830 0.673149
208 16.0 0.672675 0.616570
abs_walking_direction_lag abs_speeds_lag overlapping
1 0 5 True
2 14 1 True
4 0 4 True
5 2 10 True
6 18 28 True
.. ... ... ...
197 51 18 True
198 2 0 True
200 12 0 True
202 47 6 True
208 9 4 True
[109 rows x 23 columns]>
In [ ]:
count = len(df[df['walking_direction_lagged_corr'] > 0.5])
print(count)
79
In [ ]:
count = len(df[df['speeds_lagged_corr'] > 0.5])
print(count)
80
In [ ]:
df[df['walking_direction_lagged_corr'] == df['walking_direction_base_corr']].sort_values(by='walking_direction_lag', ascending=False).head(15)
Out[Â ]:
| participant_id | path_num | person_robot | turn_num | start_idx | end_idx | walking_direction_lag | walking_direction_base_corr | walking_direction_lagged_corr | walking_direction_dtw | ... | speeds_dtw | mean_distance | mean_speed_difference | mean_walking_direction_difference | turn_duration | normalized_walking_direction_dtw | normalized_speeds_dtw | abs_walking_direction_lag | abs_speeds_lag | overlapping | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 407 | 1 | person | 1 | 235 | 261 | 0 | 0.806176 | 0.806176 | 8.140371 | ... | 21.516657 | 2.706603 | 0.250385 | 73.832481 | 5.2 | 0.313091 | 0.827564 | 0 | 23 | False |
| 1 | 407 | 1 | robot | 1 | 232 | 300 | 0 | 0.732906 | 0.732906 | 34.565064 | ... | 38.493228 | 1.844983 | 0.318515 | 60.896701 | 13.6 | 0.508310 | 0.566077 | 0 | 5 | True |
| 206 | 2111 | 3 | person | 2 | 219 | 244 | 0 | 0.941918 | 0.941918 | 6.809205 | ... | 19.813179 | 0.502552 | 0.358440 | 82.283013 | 5.0 | 0.272368 | 0.792527 | 0 | 2 | False |
| 188 | 2109 | 2 | person | 5 | 427 | 483 | 0 | 0.526391 | 0.526391 | 33.637688 | ... | 28.478260 | 0.636772 | 0.273464 | 92.401208 | 11.2 | 0.600673 | 0.508540 | 0 | 1 | True |
| 171 | 2108 | 2 | robot | 1 | 269 | 331 | 0 | 0.325454 | 0.325454 | 59.084370 | ... | 62.017733 | 2.211453 | 0.915065 | 64.867664 | 12.4 | 0.952974 | 1.000286 | 0 | 22 | False |
| 165 | 2108 | 1 | robot | 2 | 436 | 464 | 0 | 0.863591 | 0.863591 | 12.509304 | ... | 28.906489 | 3.805783 | 0.221571 | 68.057128 | 5.6 | 0.446761 | 1.032375 | 0 | 3 | False |
| 148 | 2107 | 2 | person | 5 | 757 | 807 | 0 | 0.378086 | 0.378086 | 29.704120 | ... | 38.924311 | 3.368980 | 0.273900 | 53.103047 | 10.0 | 0.594082 | 0.778486 | 0 | 3 | False |
| 144 | 2107 | 2 | person | 1 | 333 | 362 | 0 | 0.716510 | 0.716510 | 14.198641 | ... | 13.912627 | 1.472380 | 0.263517 | 39.911269 | 5.8 | 0.489608 | 0.479746 | 0 | 0 | True |
| 102 | 2105 | 2 | person | 1 | 378 | 407 | 0 | 0.459780 | 0.459780 | 23.294339 | ... | 24.668994 | 2.047301 | 0.234966 | 55.996426 | 5.8 | 0.803253 | 0.850655 | 0 | 23 | False |
| 82 | 2103 | 2 | robot | 4 | 1064 | 1128 | 0 | 0.650376 | 0.650376 | 36.539870 | ... | 36.712230 | 0.871589 | 0.339594 | 71.137158 | 12.8 | 0.570935 | 0.573629 | 0 | 29 | True |
| 68 | 2102 | 3 | robot | 3 | 612 | 643 | 0 | 0.417467 | 0.417467 | 28.562510 | ... | 29.803919 | 2.785083 | 0.598161 | 88.469059 | 6.2 | 0.921371 | 0.961417 | 0 | 13 | True |
| 42 | 2101 | 2 | person | 3 | 1064 | 1093 | 0 | 0.781565 | 0.781565 | 15.187169 | ... | 10.358754 | 2.684810 | 0.164138 | 45.262136 | 5.8 | 0.523695 | 0.357198 | 0 | 0 | False |
| 33 | 2101 | 1 | person | 1 | 61 | 86 | 0 | 0.293240 | 0.293240 | 26.936113 | ... | 23.890237 | 0.902699 | 0.218320 | 24.226264 | 5.0 | 1.077445 | 0.955609 | 0 | 5 | False |
| 21 | 408 | 2 | person | 1 | 389 | 433 | 0 | 0.786175 | 0.786175 | 17.890734 | ... | 24.423989 | 3.531791 | 0.182818 | 52.813454 | 8.8 | 0.406608 | 0.555091 | 0 | 1 | True |
| 15 | 407 | 3 | robot | 2 | 250 | 312 | 0 | 0.621034 | 0.621034 | 45.833620 | ... | 50.858723 | 2.147284 | 0.455274 | 37.557363 | 12.4 | 0.739252 | 0.820302 | 0 | 1 | True |
15 rows × 23 columns
In [ ]:
df.where(df['speeds_lagged_corr'] > 0.5).sort_values(by='speeds_lagged_corr', ascending=False).dropna().head(15)
Out[Â ]:
| participant_id | path_num | person_robot | turn_num | start_idx | end_idx | walking_direction_lag | walking_direction_base_corr | walking_direction_lagged_corr | walking_direction_dtw | ... | speeds_dtw | mean_distance | mean_speed_difference | mean_walking_direction_difference | turn_duration | normalized_walking_direction_dtw | normalized_speeds_dtw | abs_walking_direction_lag | abs_speeds_lag | overlapping | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 42 | 2101.0 | 2.0 | person | 3.0 | 1064.0 | 1093.0 | 0.0 | 0.781565 | 0.781565 | 15.187169 | ... | 10.358754 | 2.684810 | 0.164138 | 45.262136 | 5.8 | 0.523695 | 0.357198 | 0.0 | 0.0 | False |
| 104 | 2105.0 | 2.0 | person | 3.0 | 749.0 | 816.0 | -23.0 | 0.013932 | 0.509513 | 42.612579 | ... | 21.281903 | 0.474692 | 0.124701 | 56.562109 | 13.4 | 0.636009 | 0.317640 | 23.0 | 0.0 | False |
| 189 | 2109.0 | 2.0 | robot | 1.0 | 70.0 | 120.0 | -17.0 | -0.057183 | 0.673054 | 47.122442 | ... | 20.004949 | 1.724681 | 0.230480 | 53.237857 | 10.0 | 0.942449 | 0.400099 | 17.0 | 0.0 | True |
| 184 | 2109.0 | 2.0 | person | 1.0 | 72.0 | 110.0 | -17.0 | -0.326919 | 0.505911 | 46.229325 | ... | 15.671327 | 1.816075 | 0.247447 | 60.916486 | 7.6 | 1.216561 | 0.412403 | 17.0 | 0.0 | False |
| 120 | 2106.0 | 2.0 | person | 2.0 | 457.0 | 482.0 | -9.0 | 0.215447 | 0.722022 | 19.120818 | ... | 11.133530 | 2.420559 | 0.479240 | 26.041330 | 5.0 | 0.764833 | 0.445341 | 9.0 | 2.0 | False |
| 147 | 2107.0 | 2.0 | person | 4.0 | 520.0 | 549.0 | -10.0 | 0.154518 | 0.524378 | 21.969941 | ... | 17.870706 | 1.796626 | 0.384621 | 32.503747 | 5.8 | 0.757584 | 0.616231 | 10.0 | 9.0 | True |
| 97 | 2104.0 | 2.0 | robot | 1.0 | 60.0 | 110.0 | 21.0 | -0.320415 | 0.334304 | 47.723231 | ... | 27.773751 | 0.757656 | 0.151960 | 38.997632 | 10.0 | 0.954465 | 0.555475 | 21.0 | 0.0 | True |
| 95 | 2104.0 | 1.0 | robot | 1.0 | 776.0 | 802.0 | 2.0 | 0.810848 | 0.838740 | 7.370475 | ... | 11.771324 | 1.284899 | 0.354692 | 38.198527 | 5.2 | 0.283480 | 0.452743 | 2.0 | 2.0 | False |
| 144 | 2107.0 | 2.0 | person | 1.0 | 333.0 | 362.0 | 0.0 | 0.716510 | 0.716510 | 14.198641 | ... | 13.912627 | 1.472380 | 0.263517 | 39.911269 | 5.8 | 0.489608 | 0.479746 | 0.0 | 0.0 | True |
| 13 | 407.0 | 3.0 | person | 3.0 | 317.0 | 342.0 | 13.0 | -0.652219 | 0.426496 | 36.536157 | ... | 10.451860 | 1.143140 | 0.202000 | 44.318782 | 5.0 | 1.461446 | 0.418074 | 13.0 | 0.0 | False |
| 28 | 408.0 | 3.0 | person | 2.0 | 162.0 | 191.0 | -25.0 | -0.339538 | 0.299112 | 36.922039 | ... | 15.447943 | 3.337911 | 0.163138 | 78.229208 | 5.8 | 1.273174 | 0.532688 | 25.0 | 1.0 | True |
| 56 | 2102.0 | 2.0 | person | 2.0 | 276.0 | 302.0 | 1.0 | 0.296399 | 0.358437 | 27.424802 | ... | 15.150204 | 1.579503 | 0.211423 | 23.269520 | 5.2 | 1.054800 | 0.582700 | 1.0 | 0.0 | True |
| 191 | 2109.0 | 2.0 | robot | 3.0 | 302.0 | 367.0 | -9.0 | 0.222554 | 0.519508 | 48.858096 | ... | 22.949901 | 1.654095 | 0.252631 | 69.199313 | 13.0 | 0.751663 | 0.353075 | 9.0 | 4.0 | True |
| 187 | 2109.0 | 2.0 | person | 4.0 | 291.0 | 362.0 | -9.0 | 0.210021 | 0.467147 | 53.219390 | ... | 18.235794 | 1.805430 | 0.227338 | 68.023336 | 14.2 | 0.749569 | 0.256842 | 9.0 | 2.0 | True |
| 40 | 2101.0 | 2.0 | person | 1.0 | 490.0 | 527.0 | -26.0 | -0.304902 | 0.411042 | 57.569335 | ... | 21.816757 | 2.015769 | 0.244919 | 53.535738 | 7.4 | 1.555928 | 0.589642 | 26.0 | 6.0 | True |
15 rows × 23 columns
In [ ]:
corr[corr > 0.3]
Out[Â ]:
| turn_duration | mean_distance | walking_direction_lag | abs_walking_direction_lag | walking_direction_dtw | normalized_walking_direction_dtw | walking_direction_lagged_corr | speeds_lag | abs_speeds_lag | speeds_dtw | normalized_speeds_dtw | speeds_lagged_corr | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| turn_duration | 1.000000 | NaN | NaN | 0.486517 | 0.727171 | NaN | NaN | NaN | NaN | 0.761391 | NaN | NaN |
| mean_distance | NaN | 1.0 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| walking_direction_lag | NaN | NaN | 1.0 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| abs_walking_direction_lag | 0.486517 | NaN | NaN | 1.000000 | 0.753330 | 0.439396 | NaN | NaN | NaN | 0.335844 | NaN | NaN |
| walking_direction_dtw | 0.727171 | NaN | NaN | 0.753330 | 1.000000 | 0.512962 | NaN | NaN | NaN | 0.548151 | NaN | NaN |
| normalized_walking_direction_dtw | NaN | NaN | NaN | 0.439396 | 0.512962 | 1.000000 | NaN | NaN | NaN | NaN | NaN | NaN |
| walking_direction_lagged_corr | NaN | NaN | NaN | NaN | NaN | NaN | 1.0 | NaN | NaN | NaN | NaN | NaN |
| speeds_lag | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 1.0 | NaN | NaN | NaN | NaN |
| abs_speeds_lag | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 1.000000 | 0.544645 | 0.447434 | NaN |
| speeds_dtw | 0.761391 | NaN | NaN | 0.335844 | 0.548151 | NaN | NaN | NaN | 0.544645 | 1.000000 | 0.327886 | NaN |
| normalized_speeds_dtw | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 0.447434 | 0.327886 | 1.000000 | NaN |
| speeds_lagged_corr | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 1.0 |
In [ ]:
rel.describe()
--------------------------------------------------------------------------- NameError Traceback (most recent call last) /Users/yoav.sc/Desktop/GPS Data Analysis/analyze_turns_data.ipynb Cell 27 line 1 ----> <a href='vscode-notebook-cell:/Users/yoav.sc/Desktop/GPS%20Data%20Analysis/analyze_turns_data.ipynb#X35sZmlsZQ%3D%3D?line=0'>1</a> rel.describe() NameError: name 'rel' is not defined
In [ ]:
for feature in relevant_features:
sns.histplot(data=rel, x=feature, kde=True)
plt.show()
# sns.boxplot(data=rel, x=feature)
# plt.show()
--------------------------------------------------------------------------- NameError Traceback (most recent call last) /Users/yoav.sc/Desktop/GPS Data Analysis/analyze_turns_data.ipynb Cell 28 line 2 <a href='vscode-notebook-cell:/Users/yoav.sc/Desktop/GPS%20Data%20Analysis/analyze_turns_data.ipynb#X36sZmlsZQ%3D%3D?line=0'>1</a> for feature in relevant_features: ----> <a href='vscode-notebook-cell:/Users/yoav.sc/Desktop/GPS%20Data%20Analysis/analyze_turns_data.ipynb#X36sZmlsZQ%3D%3D?line=1'>2</a> sns.histplot(data=rel, x=feature, kde=True) <a href='vscode-notebook-cell:/Users/yoav.sc/Desktop/GPS%20Data%20Analysis/analyze_turns_data.ipynb#X36sZmlsZQ%3D%3D?line=2'>3</a> plt.show() <a href='vscode-notebook-cell:/Users/yoav.sc/Desktop/GPS%20Data%20Analysis/analyze_turns_data.ipynb#X36sZmlsZQ%3D%3D?line=3'>4</a> # sns.boxplot(data=rel, x=feature) <a href='vscode-notebook-cell:/Users/yoav.sc/Desktop/GPS%20Data%20Analysis/analyze_turns_data.ipynb#X36sZmlsZQ%3D%3D?line=4'>5</a> # plt.show() NameError: name 'rel' is not defined
In [ ]:
# display paths of turns with highest and lowest correlation
# highest correlation
highest_corr = rel['speeds_lagged_corr'].idxmax()
print("Highest correlation (Speed):")
print(df.iloc[highest_corr])
highest_corr = rel['walking_direction_lagged_corr'].idxmax()
print("Highest correlation (Walking direction):")
print(df.iloc[highest_corr])
# lowest correlation
lowest_corr = rel['speeds_lagged_corr'].idxmin()
print("Lowest correlation (Speed):")
print(df.iloc[lowest_corr])
lowest_corr = rel['walking_direction_lagged_corr'].idxmin()
print("Lowest correlation (Walking direction):")
print(df.iloc[lowest_corr])
Highest correlation (Speed): participant_id 2101 path_num 3 person_robot person turn_num 1 start_idx 358 end_idx 384 walking_direction_lag 1 walking_direction_base_corr 0.577738 walking_direction_lagged_corr 0.672207 walking_direction_dtw 20.737426 speeds_lag -16 speeds_base_corr -0.363852 speeds_lagged_corr 0.470437 speeds_dtw 25.559312 mean_distance 4.330081 mean_speed_difference 0.436231 mean_walking_direction_difference 27.799034 turn_duration 5.2 normalized_walking_direction_dtw 3.987967 normalized_speeds_dtw 4.915252 abs_walking_direction_lag 1 abs_speeds_lag 16 overlapping True Name: 45, dtype: object Highest correlation (Walking direction): participant_id 2105 path_num 2 person_robot robot turn_num 4 start_idx 1207 end_idx 1255 walking_direction_lag 4 walking_direction_base_corr 0.485074 walking_direction_lagged_corr 0.508153 walking_direction_dtw 39.062309 speeds_lag -23 speeds_base_corr -0.071304 speeds_lagged_corr 0.347287 speeds_dtw 36.752992 mean_distance 2.585128 mean_speed_difference 0.319521 mean_walking_direction_difference 46.112376 turn_duration 9.6 normalized_walking_direction_dtw 4.068991 normalized_speeds_dtw 3.828437 abs_walking_direction_lag 4 abs_speeds_lag 23 overlapping False Name: 110, dtype: object Lowest correlation (Speed): participant_id 2102 path_num 1 person_robot robot turn_num 2 start_idx 287 end_idx 319 walking_direction_lag 7 walking_direction_base_corr -0.324347 walking_direction_lagged_corr 0.401246 walking_direction_dtw 25.914037 speeds_lag 1 speeds_base_corr 0.435508 speeds_lagged_corr 0.460162 speeds_dtw 32.683168 mean_distance 5.296994 mean_speed_difference 0.820219 mean_walking_direction_difference 41.311229 turn_duration 6.4 normalized_walking_direction_dtw 4.049068 normalized_speeds_dtw 5.106745 abs_walking_direction_lag 7 abs_speeds_lag 1 overlapping False Name: 53, dtype: object Lowest correlation (Walking direction): participant_id 2101 path_num 1 person_robot robot turn_num 1 start_idx 583 end_idx 650 walking_direction_lag 8 walking_direction_base_corr 0.414258 walking_direction_lagged_corr 0.510496 walking_direction_dtw 31.8583 speeds_lag 4 speeds_base_corr 0.329163 speeds_lagged_corr 0.384854 speeds_dtw 40.768658 mean_distance 4.009144 mean_speed_difference 0.220985 mean_walking_direction_difference 69.468379 turn_duration 13.4 normalized_walking_direction_dtw 2.377485 normalized_speeds_dtw 3.042437 abs_walking_direction_lag 8 abs_speeds_lag 4 overlapping True Name: 38, dtype: object
In [ ]:
threshold = 0.5
filtered_df = df[(df['walking_direction_lagged_corr'] > threshold) & (df['speeds_lagged_corr'] > threshold)]
In [ ]:
len(filtered_df)
Out[Â ]:
39
In [ ]:
filtered_df.describe().T
Out[Â ]:
| count | mean | std | min | 25% | 50% | 75% | max | |
|---|---|---|---|---|---|---|---|---|
| participant_id | 39.0 | 1757.384615 | 694.839119 | 407.000000 | 2101.500000 | 2105.000000 | 2106.500000 | 2111.000000 |
| path_num | 39.0 | 2.205128 | 0.656124 | 1.000000 | 2.000000 | 2.000000 | 3.000000 | 3.000000 |
| turn_num | 39.0 | 2.538462 | 1.958062 | 1.000000 | 1.000000 | 2.000000 | 3.000000 | 9.000000 |
| start_idx | 39.0 | 543.923077 | 496.897182 | 70.000000 | 238.500000 | 400.000000 | 661.000000 | 2199.000000 |
| end_idx | 39.0 | 582.615385 | 496.691984 | 110.000000 | 277.500000 | 431.000000 | 689.000000 | 2225.000000 |
| walking_direction_lag | 39.0 | -0.897436 | 10.651896 | -23.000000 | -5.000000 | 0.000000 | 1.500000 | 43.000000 |
| walking_direction_base_corr | 39.0 | 0.377439 | 0.350161 | -0.497956 | 0.155233 | 0.450423 | 0.636021 | 0.866582 |
| walking_direction_lagged_corr | 39.0 | 0.639285 | 0.123473 | 0.505911 | 0.524876 | 0.613507 | 0.715917 | 0.955078 |
| walking_direction_dtw | 39.0 | 26.079023 | 13.194644 | 4.239983 | 18.846778 | 21.961237 | 32.353181 | 61.422126 |
| speeds_lag | 39.0 | -2.230769 | 6.322314 | -20.000000 | -5.500000 | -2.000000 | 0.000000 | 18.000000 |
| speeds_base_corr | 39.0 | 0.363470 | 0.348324 | -0.407370 | 0.155902 | 0.469160 | 0.592340 | 0.881916 |
| speeds_lagged_corr | 39.0 | 0.650329 | 0.107183 | 0.514895 | 0.559972 | 0.628263 | 0.707393 | 0.881916 |
| speeds_dtw | 39.0 | 23.657810 | 8.449252 | 10.222585 | 17.453590 | 23.802687 | 28.914904 | 42.930466 |
| mean_distance | 39.0 | 2.134111 | 2.067214 | 0.433665 | 1.288185 | 1.775110 | 2.303315 | 13.639054 |
| mean_speed_difference | 39.0 | 0.334382 | 0.104774 | 0.124701 | 0.261519 | 0.326160 | 0.403904 | 0.538114 |
| mean_walking_direction_difference | 39.0 | 51.980670 | 19.681307 | 14.479058 | 39.022264 | 50.174653 | 61.618709 | 100.850062 |
| turn_duration | 39.0 | 7.738462 | 2.589515 | 5.000000 | 5.800000 | 7.000000 | 9.700000 | 13.600000 |
| normalized_walking_direction_dtw | 39.0 | 3.334042 | 1.168407 | 0.731032 | 2.885781 | 3.330639 | 3.800419 | 6.082806 |
| normalized_speeds_dtw | 39.0 | 3.154247 | 0.975970 | 1.588202 | 2.311082 | 3.122505 | 3.858179 | 5.189416 |
| abs_walking_direction_lag | 39.0 | 6.282051 | 8.589923 | 0.000000 | 1.000000 | 2.000000 | 9.000000 | 43.000000 |
| abs_speeds_lag | 39.0 | 4.589744 | 4.843433 | 0.000000 | 1.000000 | 3.000000 | 7.000000 | 20.000000 |
In [ ]:
# plot correlation matrix
corr = filtered_df[relevant_features].corr(method='pearson',numeric_only=True)
# plt.figure(figsize=(10,10))
sns.heatmap(corr, annot=True, fmt=".2f")
plt.show()
In [ ]:
corr[abs(corr) > 0.3]
Out[Â ]:
| turn_duration | mean_distance | walking_direction_lag | abs_walking_direction_lag | walking_direction_dtw | normalized_walking_direction_dtw | walking_direction_lagged_corr | speeds_lag | abs_speeds_lag | speeds_dtw | normalized_speeds_dtw | speeds_lagged_corr | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| turn_duration | 1.000000 | NaN | NaN | 0.349332 | 0.709851 | NaN | NaN | NaN | NaN | 0.530405 | -0.305060 | NaN |
| mean_distance | NaN | 1.0 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| walking_direction_lag | NaN | NaN | 1.000000 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | -0.451192 |
| abs_walking_direction_lag | 0.349332 | NaN | NaN | 1.000000 | 0.729586 | 0.608240 | -0.320435 | NaN | NaN | NaN | NaN | NaN |
| walking_direction_dtw | 0.709851 | NaN | NaN | 0.729586 | 1.000000 | 0.740449 | -0.591417 | NaN | NaN | NaN | NaN | NaN |
| normalized_walking_direction_dtw | NaN | NaN | NaN | 0.608240 | 0.740449 | 1.000000 | -0.736542 | NaN | NaN | NaN | NaN | NaN |
| walking_direction_lagged_corr | NaN | NaN | NaN | -0.320435 | -0.591417 | -0.736542 | 1.000000 | NaN | -0.336904 | NaN | NaN | NaN |
| speeds_lag | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 1.0 | NaN | NaN | NaN | NaN |
| abs_speeds_lag | NaN | NaN | NaN | NaN | NaN | NaN | -0.336904 | NaN | 1.000000 | 0.489237 | 0.558836 | -0.300640 |
| speeds_dtw | 0.530405 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 0.489237 | 1.000000 | 0.615769 | -0.446811 |
| normalized_speeds_dtw | -0.305060 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 0.558836 | 0.615769 | 1.000000 | -0.516680 |
| speeds_lagged_corr | NaN | NaN | -0.451192 | NaN | NaN | NaN | NaN | NaN | -0.300640 | -0.446811 | -0.516680 | 1.000000 |
In [ ]:
for feature in relevant_features:
sns.histplot(data=filtered_df, x=feature, kde=True)
plt.show()